version 17.0
clear all
cd "MYPATH\derived\build_model_sample"
adopath + ../../ado/

cap log close
log using "clean_sample.log", replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "${`PATH'}\derived"
	cap mkdir "${`PATH'}\derived\build_model_sample"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\build_model_sample\figures"
}

graph set window fontface default
graph set ps fontface default
graph set window fontfacemono "Consolas"
graph set ps fontfacemono "Consolas"


program main 
	prep_alldata	 
    rm $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA_kids.dta
	cap log close
end


capture program drop prep_data 

program prep_alldata
***********************************
	di "load and clean data"
	***********************************
	use $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA_kids.dta, clear 
	qui keep lopnr pregnancy pregid_full main_flag ///
	  wave lan date_preg year month nipt_51_200 nipt_hr_cov kub_type ///
	  did_invasive did_nipt subt kub_score fetus_risk  ///
	  dv_prev_mis dv_prev_mis_hdia prev_stillbirth ///
	  prev_death_28 prev_preterm_live prev_vpreterm_live prev_chrom_ab ///
	  prev_congen_deform prev_pnatal_issue* dv_prev_kids num_prev_kids ///
	  age educ hh_AGI_inc hh_inc_smooth inc_rank inc_quartile ///
	  married married_prev ///
	  mother_birth_date mom_foreign ///
	  carried_22 no_live live_birth stillbirth healthy chroma ///
	  chroma_detect chroma_other ///
	  birth_flag age_flag date_preg_flag due_dt_year_educ impute_birth_month_discrep
	
	rename married due_dt_year_married
	rename married_prev married
	replace mom_foreign = 1 * mom_foreign == 2 
	label def foreignlab 0 "Swedish" 1 "Foreign"
	label val mom_foreign foreignlab
	gen prev_no_live = 1 *(dv_prev_mis == 1 | prev_stillbirth == 1)
	gen prev_any_q_icd = 1 * (prev_congen_deform == 1 | prev_chrom_ab == 1)
	gen prev_concern = 1 * (dv_prev_mis == 1 | dv_prev_mis_hdia == 1 | ///
	  prev_stillbirth == 1 | prev_death_28 == 1 | prev_preterm_live == 1) 
	label var prev_concern "Prev miscarriage/stillbirth/death within 28/preterm live"
	gen prev_birth_issue = 1 * (prev_concern == 1 | prev_any_q_icd == 1)
	* Set missings of binary X's to 0
	foreach var in mom_foreign dv_prev_mis prev_stillbirth prev_death_28 ///
	  prev_preterm_live prev_vpreterm_live prev_chrom_ab ///
	  prev_congen_deform prev_pnatal_issue dv_prev_kids due_dt_year_married married {
		replace `var' = 0 if mi(`var')
	}
	gen prev_mis_still = 1 * (dv_prev_mis == 1 | dv_prev_mis_hdia | prev_stillbirth == 1) 
	foreach num of numlist 1/12 {
		local var "prev_pnatal_issue`num'"
		replace `var' = 0 if mi(`var')
	}
	
	* All live births sample 
	preserve 
	keep if birth_flag == 1 | birth_flag == 4 
	save $TEMP\derived\build_model_sample\all_live_births.dta, replace
	restore 
	
	* All live births in universal NT region months sample 
	preserve 
	keep if birth_flag == 1 | birth_flag == 2 
	keep if kub_type == 2
	save $TEMP\derived\build_model_sample\all_live_universalnt.dta, replace
	restore 
	
	* All NT screens in Universal NT region-months sample
	di "all NT screens in universal NT sample"
	keep if main_flag == 1
	count 
	assert wave == 2 | wave == 3
	assert kub_type == 2
	save $DATA\all_screens_universalnt_w_xs.dta, replace

	* Model sample 
	qui keep if fetus_risk <= 1000
	di "model sample size"
	count
	prep_data_for_model
	save $DATA\model_sample.dta, replace
end 
	  
program prep_data_for_model 
	*calc average KUB by age
	di "calc average KUB by age"
	egen ave_kub_age = mean(kub_score), by(age)

	*assumptions 
	qui gen p_i = kub_score 
		la var p_i "probability of chrom. abn., after KUB test"
		
	*mother's age at expected conception date (for Blekinge)
	la var mother_birth_date "mother's date of birth"
	qui gen expcon = date_preg - 280
		la var expcon "predicted conception date (date_preg - 280)"
		format expcon %td 
	qui personage mother_birth_date expcon, gen(age_expcon)
		la var age_expcon "mother's age at predicted conception date"
	
	*Set out-of-pocket cost for NIPT 
	* cost = 5000 SEK ~= 567.50 USD 
	qui gen oop_i = 567.5
		qui replace oop_i = . if wave==2 /* infinity */
		qui replace oop_i = 0 if wave==3 & lan==1 & inrange(fetus_risk, 51, 200) /* Stockholm */
		qui replace oop_i = 0 if wave==3 & lan==3 & inrange(fetus_risk, 2, 200) /* Uppsala */
		qui replace oop_i = 0 if wave==3 & lan==4 & inrange(fetus_risk, 51, 200) /* Sormland / Sodermanland */
		qui replace oop_i = 0 if wave==3 & lan==5 & inrange(fetus_risk, 51, 300) /* Ostergotland */
		qui replace oop_i = 0 if wave==3 & lan==6 & inrange(fetus_risk, 51, 300) /* Jonkoping */
		qui replace oop_i = 0 if wave==3 & lan==7 & inrange(fetus_risk, 51, 1000) /* Kronoberg */
		qui replace oop_i = 0 if wave==3 & lan==8 & inrange(fetus_risk, 51, 300) /* Kalmar */ 
		qui replace oop_i = 0 if wave==3 & lan==9 & inrange(fetus_risk, 51, 200) /* Gotland */
		qui replace oop_i = 0 if wave==3 & lan==10 & age_expcon >= 32 /* Blekinge */
		qui replace oop_i = 0 if wave==3 & lan==12 & inrange(fetus_risk, 51, 1000) /* Skane */
		qui replace oop_i = 0 if wave==3 & lan==13 & inrange(fetus_risk, 51, 200) /* Halland */
		qui replace oop_i = 0 if wave==3 & lan==14 & inrange(fetus_risk, 51, 200) /* Vastra Gotaland */
		qui replace oop_i = 0 if wave==3 & lan==17 & inrange(fetus_risk, 2, 200) /* Varmland */ 
		qui replace oop_i = 0 if wave==3 & lan==18 & inrange(fetus_risk, 2, 1000) /* Orebro */
		qui replace oop_i = 0 if wave==3 & lan==19 & inrange(fetus_risk, 51, 200) /* Vastmanland */
		qui replace oop_i = 0 if wave==3 & lan==20 & inrange(fetus_risk, 51, 200) /* Dalarna */
		qui replace oop_i = 0 if wave==3 & lan==21 & inrange(fetus_risk, 2, 200) /* Gavleborg */
		qui replace oop_i = 0 if wave==3 & lan==22 & inrange(fetus_risk, 2, 200) /* Vastenorrland */
		qui replace oop_i = 0 if wave==3 & lan==23 & inrange(fetus_risk, 51, 200) /* Jamtland */
		*qui replace oop_i = 0 if wave==3 & lan==24 & ??? /* Vasterbotten - never transitioned to wave 3 */
		*qui replace oop_i = 0 if wave==3 & lan==25 & ??? /* Norbotten - never transitioned to wave 3 */
		
	di "oop_i for NIPT vs. wave"
	tab oop_i wave, mi
	
	*Calculate kub-risk bins for moments 
	local bin_size = 50
	qui gen bin_number = .
	local count = 1
	forval bin_max = `bin_size'(`bin_size')1000 {
		local bin_inf = `bin_max' - `bin_size'
		qui replace bin_number = `count' if fetus_risk <= `bin_max' & fetus_risk > `bin_inf'
		local count = `count' + 1
	}
	
	* Generate policy_regime for NIPT moments
	qui gen policy_regime = 1 if nipt_51_200 & wave == 3
	qui replace policy_regime = 2 if inlist(lan,5,6,8) & wave == 3 // ( 51_300) not in sample
	qui replace policy_regime = 3 if inlist(lan,7,12) & wave == 3 // (51_1000)
	qui replace policy_regime = 4 if inlist(lan, 3, 17, 21, 22) & wave == 3 // hr_cov
	qui replace policy_regime = 5 if inlist(lan,18) & wave == 3 // orebro (all)
	qui replace policy_regime = 6 if inlist(lan,10) & wave == 3 // blekinge (not in sample)
	qui replace policy_regime = 0 if wave == 2
	
	* Generate policy id for variance moments
	local count = 1
	qui gen policy_id = .
	forval rgm = 0/6 {
	    qui replace policy_id = `count' if policy_regime == `rgm' & fetus_risk > 200
		qui replace policy_id = `count' + 1 if policy_regime == `rgm' & fetus_risk >= 51 & fetus_risk <= 200
		qui replace policy_id = `count' + 2 if policy_regime == `rgm' & fetus_risk < 51
		local count = `count' + 3
	}
	
	assert !mi(policy_id)
	assert !mi(policy_regime)
	
	*save 
	di "save data"
	qui compress 
	sum 
end 


capture program drop fake_data 
program fake_data

	clear
	set seed 1001 
	
	di "import moments"
	set obs 20
	qui gen bin_number = _n 
	
	qui gen nipt_share = .
		qui replace nipt_share = 0.15245702 if bin_number==1
		qui replace nipt_share = 0.34691708 if bin_number==2
		qui replace nipt_share = 0.35580829 if bin_number==3
		qui replace nipt_share = 0.34231579 if bin_number==4
		qui replace nipt_share = 0.05882353 if bin_number==5
		qui replace nipt_share = 0.04150000 if bin_number==6
		qui replace nipt_share = 0.04685139 if bin_number==7
		qui replace nipt_share = 0.04196891 if bin_number==8
		qui replace nipt_share = 0.04728641 if bin_number==9
		qui replace nipt_share = 0.03965236 if bin_number==10
		qui replace nipt_share = 0.04918033 if bin_number==11
		qui replace nipt_share = 0.04428044 if bin_number==12
		qui replace nipt_share = 0.05295192 if bin_number==13
		qui replace nipt_share = 0.04943419 if bin_number==14
		qui replace nipt_share = 0.04643766 if bin_number==15
		qui replace nipt_share = 0.04139290 if bin_number==16
		qui replace nipt_share = 0.04051317 if bin_number==17
		qui replace nipt_share = 0.04019074 if bin_number==18
		qui replace nipt_share = 0.03840782 if bin_number==19
		qui replace nipt_share = 0.04731183 if bin_number==20

	qui gen invY_share = .
		qui replace invY_share = 0.06317138 if bin_number==1
		qui replace invY_share = 0.02345371 if bin_number==2
		qui replace invY_share = 0.01794885 if bin_number==3
		qui replace invY_share = 0.01562970 if bin_number==4
		qui replace invY_share = 0.01445393 if bin_number==5
		qui replace invY_share = 0.01361304 if bin_number==6
		qui replace invY_share = 0.01299124 if bin_number==7
		qui replace invY_share = 0.01262130 if bin_number==8
		qui replace invY_share = 0.01230326 if bin_number==9
		qui replace invY_share = 0.01206611 if bin_number==10
		qui replace invY_share = 0.01186528 if bin_number==11
		qui replace invY_share = 0.01170166 if bin_number==12
		qui replace invY_share = 0.01157518 if bin_number==13
		qui replace invY_share = 0.01145506 if bin_number==14
		qui replace invY_share = 0.01135476 if bin_number==15
		qui replace invY_share = 0.01125989 if bin_number==16
		qui replace invY_share = 0.01118491 if bin_number==17
		qui replace invY_share = 0.01111753 if bin_number==18
		qui replace invY_share = 0.01105952 if bin_number==19
		qui replace invY_share = 0.01100475 if bin_number==20
		
	qui gen invN_share = .
		qui replace invN_share = 0.999590248 if bin_number==1
		qui replace invN_share = 0.932718394 if bin_number==2
		qui replace invN_share = 0.585119798 if bin_number==3
		qui replace invN_share = 0.215749040 if bin_number==4
		qui replace invN_share = 0.044421488 if bin_number==5
		qui replace invN_share = 0.015127804 if bin_number==6
		qui replace invN_share = 0.003699789 if bin_number==7
		qui replace invN_share = 0 if bin_number==8
		qui replace invN_share = 0 if bin_number==9
		qui replace invN_share = 0 if bin_number==10
		qui replace invN_share = 0 if bin_number==11
		qui replace invN_share = 0 if bin_number==12
		qui replace invN_share = 0 if bin_number==13
		qui replace invN_share = 0 if bin_number==14
		qui replace invN_share = 0 if bin_number==15
		qui replace invN_share = 0 if bin_number==16
		qui replace invN_share = 0 if bin_number==17
		qui replace invN_share = 0 if bin_number==18
		qui replace invN_share = 0 if bin_number==19
		qui replace invN_share = 0 if bin_number==20
	
	di "set N per bin"
	qui expand 5759 if bin_number==1 
	qui expand 2822 if bin_number==2 
	qui expand 2462 if bin_number==3 
	qui expand 2375 if bin_number==4 
	qui expand 2057 if bin_number==5 
	qui expand 2000 if bin_number==6 
	qui expand 1985 if bin_number==7 
	qui expand 1930 if bin_number==8 
	qui expand 1861 if bin_number==9
	qui expand 1841 if bin_number==10
	qui expand 1830 if bin_number==11
	qui expand 1626 if bin_number==12
	qui expand 1643 if bin_number==13
	qui expand 1679 if bin_number==14
	qui expand 1572 if bin_number==15
	qui expand 1522 if bin_number==16
	qui expand 1481 if bin_number==17
	qui expand 1468 if bin_number==18
	qui expand 1432 if bin_number==19
	qui expand 1395 if bin_number==20
	tab bin_number 
	
	di "NIPT decision"
	by bin_number, sort: gen wi_num = _n 
	egen bin_N = max(wi_num), by(bin_number)
	qui gen bin_nipt = bin_N * nipt_share 
	qui gen did_nipt = (wi_num <= bin_nipt)
	qui drop wi_num bin_N bin_nipt 
	
	di "(Invasive | yes NIPT) decision"
	preserve 
	keep if did_nipt==1 
	by bin_number, sort: gen wi_num = _n 
	egen bin_N = max(wi_num), by(bin_number)
	qui gen bin_invY = bin_N * invY_share 
	qui gen did_invasive = (wi_num <= bin_invY)
	qui drop wi_num bin_N bin_invY 
	tempfile fakeY
	save `fakeY', replace 
	restore 
	
	di "(Invasive | no NIPT) decision"
	keep if did_nipt==0
	by bin_number, sort: gen wi_num = _n 
	egen bin_N = max(wi_num), by(bin_number)
	qui gen bin_invN = bin_N * invN_share 
	qui gen did_invasive = (wi_num <= bin_invN)
	qui drop wi_num bin_N bin_invN
	append using `fakeY'
	
	sort bin_number did_nipt did_invasive 
	count 
	
	di "Generate p_i - assume risk uniformly distributed within bin" 
	qui gen risk_max = bin_number*50 
	qui gen risk_min = (bin_number-1)*50+1
		qui replace risk_min = 2 if bin_number==1 
	qui gen risk_i = runiformint(risk_min, risk_max)
	qui gen p_i = 1/risk_i 
	sum p_i 
	qui drop risk* 
	
	di "Generate oop_i - requires a bunch of assumptions; try to get somewhat close to actual data"
	qui gen oop_i = . 
	
		di "first, assume 75% of did_nipt=0 are in wave 2; all others are in wave 3"
		di "also assume 18% of did_nipt=1 are in wave 2; all others are in wave 3"
		qui gen wave2 = rbinomial(1,0.75) if did_nipt==0
		qui gen wave = 2 if wave2==1
		qui drop wave2 
		qui gen wave2 = rbinomial(1,0.18) if did_nipt==1
		qui replace wave = 2 if wave2==1
		qui drop wave2 
		qui replace wave = 3 if wave==. 
	
		di "second, of those in wave 3 with 1/200 <= p_i < 1/51, set oop=0"
		qui replace oop_i = 0 if wave==3 & 1/200 <= p_i & p_i < 1/51
		
		di "third, of those in wave 3 with p_i >= 1/51, set oop=0 10% of time"
		qui gen temp = rbinomial(1,0.1) if wave==3 & p_i >= 1/51
		qui replace oop_i = 0 if temp==1
		qui replace oop_i = 567.5 if temp==0 
		qui drop temp 
		
		di "fourth, of those in wave 3 with p_i < 1/200, set oop=0 14% of time"
		qui gen temp = rbinomial(1,0.14) if wave==3 & p_i < 1/200
		qui replace oop_i = 0 if temp==1
		qui replace oop_i = 567.5 if temp==0 
		qui drop temp 
		
	tab oop_i wave, mi 
	
	*save 
	di "save data"
	qui compress 
	qui keep p_i oop_i wave bin_number did_nipt did_invasive 
	sum 
	save $TEMP\Prenatal\baby_model_nipt\model_sample-fake.dta, replace
	
end 


capture program drop fake_data_monte 
program fake_data_monte 

	clear
	set seed 1001 

	set obs 20
	qui gen bin_number = _n 
	
	di "set number in wave2"
	qui gen wave2_N = .
		qui replace wave2_N = 3818 if bin_number==1
		qui replace wave2_N = 1841 if bin_number==2
		qui replace wave2_N = 1586 if bin_number==3
		qui replace wave2_N = 1552 if bin_number==4
		qui replace wave2_N = 1428 if bin_number==5
		qui replace wave2_N = 1321 if bin_number==6
		qui replace wave2_N = 1313 if bin_number==7
		qui replace wave2_N = 1306 if bin_number==8
		qui replace wave2_N = 1244 if bin_number==9
		qui replace wave2_N = 1248 if bin_number==10
		qui replace wave2_N = 1199 if bin_number==11
		qui replace wave2_N = 1090 if bin_number==12
		qui replace wave2_N = 1104 if bin_number==13
		qui replace wave2_N = 1125 if bin_number==14
		qui replace wave2_N = 1036 if bin_number==15
		qui replace wave2_N = 1040 if bin_number==16
		qui replace wave2_N = 979 if bin_number==17
		qui replace wave2_N = 982 if bin_number==18
		qui replace wave2_N = 954 if bin_number==19
		qui replace wave2_N = 899 if bin_number==20
		
	di "set number with oop=0 (in wave 3)"
	qui gen oop0_N = .
		qui replace oop0_N = 190 if bin_number==1
		qui replace oop0_N = 962 if bin_number==2
		qui replace oop0_N = 876 if bin_number==3
		qui replace oop0_N = 810 if bin_number==4
		qui replace oop0_N = 91 if bin_number==5
		qui replace oop0_N = 83 if bin_number==6
		qui replace oop0_N = 93 if bin_number==7
		qui replace oop0_N = 81 if bin_number==8
		qui replace oop0_N = 88 if bin_number==9
		qui replace oop0_N = 73 if bin_number==10
		qui replace oop0_N = 90 if bin_number==11
		qui replace oop0_N = 72 if bin_number==12
		qui replace oop0_N = 87 if bin_number==13
		qui replace oop0_N = 83 if bin_number==14
		qui replace oop0_N = 73 if bin_number==15
		qui replace oop0_N = 63 if bin_number==16
		qui replace oop0_N = 60 if bin_number==17
		qui replace oop0_N = 59 if bin_number==18
		qui replace oop0_N = 55 if bin_number==19
		qui replace oop0_N = 66 if bin_number==20
	
	di "set N per bin"
	qui expand 5759 if bin_number==1 
	qui expand 2822 if bin_number==2 
	qui expand 2462 if bin_number==3 
	qui expand 2375 if bin_number==4 
	qui expand 2057 if bin_number==5 
	qui expand 2000 if bin_number==6 
	qui expand 1985 if bin_number==7 
	qui expand 1930 if bin_number==8 
	qui expand 1861 if bin_number==9
	qui expand 1841 if bin_number==10
	qui expand 1830 if bin_number==11
	qui expand 1626 if bin_number==12
	qui expand 1643 if bin_number==13
	qui expand 1679 if bin_number==14
	qui expand 1572 if bin_number==15
	qui expand 1522 if bin_number==16
	qui expand 1481 if bin_number==17
	qui expand 1468 if bin_number==18
	qui expand 1432 if bin_number==19
	qui expand 1395 if bin_number==20
	tab bin_number 
	
	di "Designate wave"
	by bin_number, sort: gen wi_num = _n 
	qui gen wave = 3
		qui replace wave = 2 if wi_num <= wave2_N
	qui drop wi_num
	
	di "Designate oop_i"
	by bin_number wave, sort: gen wi_num = _n 
	qui gen oop_i = . /* infinity - wave 2 */
		qui replace oop_i = 0 if wave==3 & wi_num <= oop0_N 
		qui replace oop_i = 567.5 if wave==3 & wi_num > oop0_N 
	qui drop wi_num 
	
	di "Generate p_i - assume risk uniformly distributed within bin" 
	qui gen risk_max = bin_number*50 
	qui gen risk_min = (bin_number-1)*50+1
		qui replace risk_min = 2 if bin_number==1 
	qui gen risk_i = runiformint(risk_min, risk_max)
	qui gen p_i = 1/risk_i 
	sum p_i 
	qui drop risk* 
	
	*save 
	di "save data"
	qui compress 
	qui keep p_i oop_i wave bin_number
	sum 
	save $TEMP\baby_model_nipt\model_sample-fake-monte.dta, replace
	
end

* Execute 
main

